rm(list=ls())
load("alumni.Rdata")
names(alumni)[4] = "HABITAT"
head(alumni)
alumni=alumni[sample(1:length(alumni$PMAT_6), replace = F, size = 4000), ]
dim(alumni)
## [1] 4000 10
#alumni$ANY = alumni$ANY - min(alumni$ANY)
# for(i in 0:2){
# indices = which(alumni$ANY == i)
# alumni$PCAT[indices] = scale(alumni$PCAT[indices], center = T, scale = F)
# alumni$PCAST[indices] = scale(alumni$PCAST[indices], center = T, scale = F)
# alumni$PANG[indices] = scale(alumni$PANG[indices], center = T, scale = F)
# alumni$PMAT[indices] = scale(alumni$PMAT[indices], center = T, scale = F)
# alumni$PCIEN[indices] = scale(alumni$PCIEN[indices], center = T, scale = F)
# alumni$PUMAN[indices] = scale(alumni$PUMAN[indices], center = T, scale = F)
# }
library(fastDummies)
alumni = dummy_cols(alumni, select_columns = c("NATURALESA", "GENERE", "HABITAT"), remove_first_dummy = T, remove_selected_columns = T)
head(alumni)
areas = dummy_cols(data.frame(Area = alumni$AREA_TERRITORIAL), remove_first_dummy = T, remove_selected_columns = T)
names(areas)
## [1] "Area_Barcelona Comarques"
## [2] "Area_Barcelona II (Comarques)"
## [3] "Area_Catalunya Central"
## [4] "Area_Consorci d'Educació de Barcelona"
## [5] "Area_Girona"
## [6] "Area_Lleida"
## [7] "Area_Maresme - Vallès Oriental"
## [8] "Area_Maresme Vallès Oriental"
## [9] "Area_Maresme-Vallès Oriental"
## [10] "Area_Tarragona"
## [11] "Area_Terres de l'Ebre"
## [12] "Area_Vallès Occidental"
m=dim(areas)[2]
# model = lm(PMAT ~ . - PUMAN + PUMAN:GENERE, data=alumni)
# summary(model)
# plot(function(x){3 - 1*exp(-1*x)}, xlim=c(0,100), xlab="age", ylab="length")
# plot(function(x){3 - 1*exp(-0.1*x)}, xlim=c(0,100), add=TRUE, lty=2)
# plot(function(x){3 - 1*exp(-0.01*x)}, xlim=c(0,100), add=TRUE, lty=2)
# legend("bottomright",c("b2=1","b2=0.1","b2=0.01"), lty=c(1,2,3))
#in analisi descriptiva ver si las dferentes covariatas tienen: # - un oattern particular # - una varianza diferente # HACER PLOTS
#si en catalano subes de uno la nota es probable que la subas tambien en mate?
#el modelo con habitat tendrìa que ser arreglado con 2 dummies
mod.alumni <- "
model {
for (i in 1:n) {
y[i]~ dnorm(b0 + b1*x1[i] + b2*x2[i] + b3*x3[i] + b4*x4[i] + b5*x5[i] + b6*x6[i] + b7*x7[i] + b8*x8[i] + b9*x9[i], tau_y)
}
b0 ~ dnorm(0, 1)
b1 ~ dnorm(1, 1)
b2 ~ dnorm(1, 1)
b3 ~ dnorm(1, 1)
b4 ~ dnorm(1, 1)
b5 ~ dnorm(0, 1)
b6 ~ dnorm(0, 1)
b7 ~ dnorm(0, 1)
b8 ~ dnorm(0, 1)
b9 ~ dnorm(0, 1)
tau_y ~ dgamma(0.001, 0.001)
sigma_y <- 1/sqrt(tau_y)
}
"
Iter <- 5000
Burn <- 0
Chain <- 2
Thin <- 1 #per eliminare l'effetto dell'autocorrelazione delle simulazioni
data = with(alumni, list(y = PMAT_6, x1 = PMAT_4, x2=PLENG_4, x3=PLENG_6, x4=PANG_4, x5=PANG_6, x6=NATURALESA_Pública, x7=`HABITAT_Fins a 10000`, x8=`HABITAT_Més de 100000`, x9=GENERE_H, n = dim(alumni)[1]))
parameters <- c("b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7", "b8", "b9", "sigma_y")
initials=list(list(b0 = 3, b1 = 1, b2= 0.1, b3=0, b4=0, b5=0, b6=0, b7=0,b8=0,b9=0,
tau_y = 4),
list(b0 = 2, b1 = 1.5, b2= 0.2, b3=0, b4=1, b5=1, b6=1, b7=1, b8=1,b9=1,
tau_y = 2))
alumni.sim <- jags(data, inits=initials, parameters.to.save=parameters,
n.iter=(Iter*Thin+Burn),n.burnin=Burn, n.thin=Thin, n.chains=Chain,
model=textConnection(mod.alumni))
## module glm loaded
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 4000
## Unobserved stochastic nodes: 11
## Total graph size: 47370
##
## Initializing model
traceplot(alumni.sim, mfrow = c(2,2), varname = parameters, col=c("black","red"), ask=F)
print(alumni.sim, digits=2)
## Inference for Bugs model at "4", fit using jags,
## 2 chains, each with 5000 iterations (first 0 discarded)
## n.sims = 10000 iterations saved
## mu.vect sd.vect 2.5% 25% 50% 75% 97.5% Rhat
## b0 -4.57 0.90 -6.30 -5.16 -4.56 -3.98 -2.87 1
## b1 0.56 0.02 0.53 0.55 0.56 0.57 0.59 1
## b2 0.18 0.02 0.14 0.17 0.18 0.20 0.23 1
## b3 0.01 0.02 -0.04 -0.01 0.01 0.02 0.05 1
## b4 0.35 0.02 0.32 0.34 0.35 0.36 0.38 1
## b5 -0.15 0.02 -0.19 -0.16 -0.15 -0.13 -0.10 1
## b6 -2.69 0.39 -3.45 -2.95 -2.69 -2.43 -1.93 1
## b7 0.50 0.50 -0.46 0.16 0.50 0.84 1.47 1
## b8 -0.72 0.39 -1.47 -0.98 -0.72 -0.45 0.03 1
## b9 4.38 0.38 3.64 4.12 4.38 4.63 5.11 1
## sigma_y 12.13 0.14 11.87 12.04 12.13 12.23 12.40 1
## deviance 31320.06 9.91 31301.59 31313.29 31319.75 31326.53 31340.43 1
## n.eff
## b0 10000
## b1 10000
## b2 5200
## b3 10000
## b4 10000
## b5 10000
## b6 4200
## b7 10000
## b8 10000
## b9 10000
## sigma_y 2100
## deviance 10000
##
## For each parameter, n.eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor (at convergence, Rhat=1).
##
## DIC info (using the rule, pD = var(deviance)/2)
## pD = 49.1 and DIC = 31369.2
## DIC is an estimate of expected predictive error (lower deviance is better).
linea = which(rownames(alumni.sim$BUGSoutput$summary)=="deviance")
confid.inter = alumni.sim$BUGSoutput$summary[-linea,c(3,7)]
plot(1:length(confid.inter[,1]), confid.inter[,1], col="blue")
points(1:length(confid.inter[,1]), confid.inter[,2], col="red")
abline(h=0, lty=2)
mod.alumni.2 <- "
model {
for (i in 1:n) {
y[i]~ dnorm(b0 + b1*x1[i] + b2*x2[i] + b3*x3[i] + b4*x4[i] + b5*x5[i] + b6*x6[i] + b7*x7[i] + b8*x8[i] + b9*x9[i] + sum(G*geo[i,]), tau_y)
}
b0 ~ dnorm(0, 1)
b1 ~ dnorm(1, 1)
b2 ~ dnorm(1, 1)
b3 ~ dnorm(1, 1)
b4 ~ dnorm(1, 1)
b5 ~ dnorm(0, 1)
b6 ~ dnorm(0, 1)
b7 ~ dnorm(0, 1)
b8 ~ dnorm(0, 1)
b9 ~ dnorm(0, 1)
tau_y ~ dgamma(0.001, 0.001)
sigma_y <- 1/sqrt(tau_y)
for(i in 1:m) {
G[i] ~ dnorm(0, tau_g)
# G[i] ~ dnorm(0, 0.001)
}
tau_g ~ dgamma(0.001, 0.001)
sigma_g <- 1/sqrt(tau_g)
# sigma_g=1
}
"
Iter <- 5000
Burn <- 0
Chain <- 2
Thin <- 1 #per eliminare l'effetto dell'autocorrelazione delle simulazioni
data.2 <- list(y = alumni$PMAT_6, x1 = alumni$PMAT_4, x2=alumni$PLENG_4, x3=alumni$PLENG_6, x4=alumni$PANG_4, x5=alumni$PANG_6, x6=alumni$NATURALESA_Pública, x7=alumni$`HABITAT_Fins a 10000`, x8=alumni$`HABITAT_Més de 100000`, x9=alumni$GENERE_H, geo=areas, n = dim(alumni)[1], m=m)
parameters.2 <- c("b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7", "b8", "b9", "sigma_y", "sigma_g", "G")
initials.2=list(list(b0 = 3, b1 = 1, b2= 0.1, b3=0, b4=0, b5=0, b6=0, b7=0,b8=0,b9=0,
tau_y = 4, tau_g= 4, G=rep(0,m)),
list(b0 = 2, b1 = 1.5, b2= 0.2, b3=0, b4=1, b5=1, b6=1, b7=1, b8=1,b9=1,
tau_y = 2, tau_g = 2, G=rep(0,m)))
alumni.sim.2 <- jags(data.2, inits=initials.2, parameters.to.save=parameters.2,
n.iter=(Iter*Thin+Burn),n.burnin=Burn, n.thin=Thin, n.chains=Chain,
model=textConnection(mod.alumni.2))
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 4000
## Unobserved stochastic nodes: 24
## Total graph size: 99409
##
## Initializing model
traceplot(alumni.sim.2, mfrow = c(2,2), varname = parameters.2, col=c("black","red"), ask=F)
print(alumni.sim.2, digits=2)
## Inference for Bugs model at "5", fit using jags,
## 2 chains, each with 5000 iterations (first 0 discarded)
## n.sims = 10000 iterations saved
## mu.vect sd.vect 2.5% 25% 50% 75% 97.5% Rhat
## G[1] -0.78 0.75 -2.47 -1.26 -0.66 -0.14 0.24 1
## G[2] 0.02 1.27 -2.62 -0.45 0.01 0.48 2.71 1
## G[3] -1.02 0.90 -2.99 -1.64 -0.89 -0.22 0.20 1
## G[4] -0.19 0.61 -1.63 -0.50 -0.06 0.17 0.86 1
## G[5] 0.04 0.56 -1.21 -0.22 0.05 0.37 1.12 1
## G[6] -1.12 1.03 -3.39 -1.82 -0.93 -0.21 0.20 1
## G[7] -0.38 0.60 -1.78 -0.73 -0.24 0.03 0.55 1
## G[8] -0.01 1.27 -2.78 -0.47 0.00 0.48 2.67 1
## G[9] -0.11 1.04 -2.52 -0.54 -0.02 0.37 1.96 1
## G[10] 0.65 0.93 -0.80 0.01 0.43 1.18 2.89 1
## G[11] -0.60 1.29 -4.01 -1.08 -0.24 0.08 1.27 1
## G[12] -0.91 0.76 -2.54 -1.42 -0.83 -0.25 0.13 1
## b0 -4.46 0.88 -6.20 -5.05 -4.47 -3.88 -2.76 1
## b1 0.56 0.02 0.53 0.55 0.56 0.58 0.60 1
## b2 0.19 0.02 0.14 0.17 0.19 0.20 0.23 1
## b3 0.01 0.02 -0.04 -0.01 0.01 0.02 0.05 1
## b4 0.35 0.02 0.32 0.34 0.35 0.36 0.38 1
## b5 -0.14 0.02 -0.19 -0.16 -0.14 -0.13 -0.10 1
## b6 -2.62 0.39 -3.40 -2.88 -2.61 -2.35 -1.85 1
## b7 0.59 0.52 -0.41 0.24 0.58 0.95 1.61 1
## b8 -0.66 0.46 -1.52 -0.98 -0.67 -0.36 0.27 1
## b9 4.37 0.37 3.64 4.11 4.37 4.62 5.10 1
## sigma_g 1.00 0.74 0.05 0.43 0.90 1.41 2.72 1
## sigma_y 12.12 0.14 11.85 12.03 12.12 12.21 12.39 1
## deviance 31310.47 11.56 31289.12 31302.35 31310.18 31318.08 31333.79 1
## n.eff
## G[1] 4100
## G[2] 10000
## G[3] 4200
## G[4] 10000
## G[5] 10000
## G[6] 6100
## G[7] 2000
## G[8] 6900
## G[9] 10000
## G[10] 3300
## G[11] 10000
## G[12] 2000
## b0 10000
## b1 10000
## b2 10000
## b3 10000
## b4 3200
## b5 9500
## b6 10000
## b7 10000
## b8 10000
## b9 10000
## sigma_g 2900
## sigma_y 10000
## deviance 6300
##
## For each parameter, n.eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor (at convergence, Rhat=1).
##
## DIC info (using the rule, pD = var(deviance)/2)
## pD = 66.8 and DIC = 31377.3
## DIC is an estimate of expected predictive error (lower deviance is better).
linea = which(rownames(alumni.sim.2$BUGSoutput$summary)=="deviance")
confid.inter = alumni.sim.2$BUGSoutput$summary[-linea,c(3,7)]
plot(1:length(confid.inter[,1]), confid.inter[,1], col="blue")
points(1:length(confid.inter[,1]), confid.inter[,2], col="red")
abline(h=0, lty=2)
mod.alumni.3 <- "
model {
for (i in 1:n) {
y[i]~ dnorm(b0 + b1*x1[i] + b2*x2[i] + b3*x3[i] + b4*x4[i] + b5*x5[i] + b6*x6[i] + b7*x7[i] + b8*x8[i] + b9*x9[i] + b10*x3[i]*x9[i], tau_y)
}
b0 ~ dnorm(0, 1)
b1 ~ dnorm(1, 1)
b2 ~ dnorm(1, 1)
b3 ~ dnorm(1, 1)
b4 ~ dnorm(1, 1)
b5 ~ dnorm(0, 1)
b6 ~ dnorm(0, 1)
b7 ~ dnorm(0, 1)
b8 ~ dnorm(0, 1)
b9 ~ dnorm(0, 1)
b10 ~ dnorm(0, 1)
tau_y ~ dgamma(0.001, 0.001)
sigma_y <- 1/sqrt(tau_y)
}
"
Iter <- 5000
Burn <- 0
Chain <- 2
Thin <- 1 #per eliminare l'effetto dell'autocorrelazione delle simulazioni
data.3 <- with(alumni, list(y = PMAT_6, x1 = PMAT_4, x2=PLENG_4, x3=PLENG_6, x4=PANG_4, x5=PANG_6, x6=NATURALESA_Pública, x7=`HABITAT_Fins a 10000`, x8=`HABITAT_Més de 100000`, x9=GENERE_H, n = dim(alumni)[1]))
parameters.3 <- c("b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7", "b8", "b9", "b10", "sigma_y")
initials.3=list(list(b0 = 3, b1 = 1, b2= 0.1, b3=0, b4=0, b5=0, b6=0, b7=0,b8=0,b9=0, b10=0,
tau_y = 4),
list(b0 = 2, b1 = 1.5, b2= 0.2, b3=0, b4=1, b5=1, b6=1, b7=1, b8=1,b9=1, b10=1,
tau_y = 2))
alumni.sim.3 <- jags(data.3, inits=initials.3, parameters.to.save=parameters.3,
n.iter=(Iter*Thin+Burn),n.burnin=Burn, n.thin=Thin, n.chains=Chain,
model=textConnection(mod.alumni.3))
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 4000
## Unobserved stochastic nodes: 12
## Total graph size: 47942
##
## Initializing model
traceplot(alumni.sim.3, mfrow = c(2,2), varname = parameters.3, col=c("black","red"), ask=F)
print(alumni.sim.3, digits=2)
## Inference for Bugs model at "6", fit using jags,
## 2 chains, each with 5000 iterations (first 0 discarded)
## n.sims = 10000 iterations saved
## mu.vect sd.vect 2.5% 25% 50% 75% 97.5% Rhat
## b0 -4.05 0.89 -5.74 -4.63 -4.06 -3.47 -2.34 1
## b1 0.56 0.02 0.53 0.55 0.56 0.57 0.59 1
## b10 0.07 0.01 0.05 0.06 0.07 0.08 0.10 1
## b2 0.21 0.02 0.17 0.20 0.21 0.23 0.26 1
## b3 -0.03 0.02 -0.08 -0.05 -0.03 -0.02 0.01 1
## b4 0.35 0.02 0.32 0.34 0.35 0.36 0.38 1
## b5 -0.14 0.02 -0.18 -0.16 -0.14 -0.13 -0.10 1
## b6 -2.56 0.39 -3.32 -2.83 -2.56 -2.30 -1.82 1
## b7 0.53 0.50 -0.46 0.18 0.53 0.87 1.51 1
## b8 -0.69 0.39 -1.44 -0.96 -0.69 -0.42 0.08 1
## b9 -0.32 0.87 -2.00 -0.90 -0.33 0.26 1.37 1
## sigma_y 12.12 0.14 11.85 12.02 12.12 12.21 12.39 1
## deviance 31308.12 8.22 31293.33 31302.43 31307.69 31313.37 31325.38 1
## n.eff
## b0 10000
## b1 10000
## b10 10000
## b2 10000
## b3 10000
## b4 10000
## b5 10000
## b6 10000
## b7 5600
## b8 10000
## b9 10000
## sigma_y 10000
## deviance 10000
##
## For each parameter, n.eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor (at convergence, Rhat=1).
##
## DIC info (using the rule, pD = var(deviance)/2)
## pD = 33.8 and DIC = 31341.9
## DIC is an estimate of expected predictive error (lower deviance is better).
linea = which(rownames(alumni.sim.3$BUGSoutput$summary)=="deviance")
confid.inter = alumni.sim.3$BUGSoutput$summary[-linea,c(3,7)]
plot(1:length(confid.inter[,1]), confid.inter[,1], col="blue")
points(1:length(confid.inter[,1]), confid.inter[,2], col="red")
abline(h=0, lty=2)